1 Tools

randomTimeSeries <- function(n=20, amplitud=1, phase=2*pi*runif(1), velocity=rexp(1), noise=0.05) {
  return(amplitud*sin(phase+velocity*seq(0,2*pi,length.out=n))+noise*amplitud*rnorm(n))
}

2 Packages principales

library(ggplot2)
library(igraph)
library(RColorBrewer)
library(reshape2)
library(data.table)
library(ROCR)
library(rplot)

3 Basic Plots

3.1 Time Series

matrixTimeSeries = cbind(unlist(sapply(1:100, function (x) {randomTimeSeries(200,velocity=1,noise=0)})))
rplot::r.plot(matrixTimeSeries)

rplot::r.plot(x=seq(0,1,1/199), y=matrixTimeSeries)

matrixTimeSeries = cbind(unlist(sapply(1:5, function (x) {randomTimeSeries(50,velocity=1,noise=0)})))
r.plot(matrixTimeSeries)
r.plot.add(matrixTimeSeries, type='p')

3.2 Lines I

x = seq(0,10,0.1)
y = sin(seq(0,10,0.1))
z = cos(seq(0,10,0.1))
        
r.plot(x, y, type='l')

3.3 Lines II

r.plot.new(xlim=c(0,10), ylim=c(-1,1),xlab="x",ylab="y")
r.plot.add(x, y, type='l', icol=1)
r.plot.add(x, z, type='l', icol=2)

r.plot.new(xlim=c(0,10), ylim=c(-1,1))
r.plot.add(x, y, type='l', col=1)
r.plot.add(x, z, type='l', col=2)

r.plot.new(xlim=c(0,10), ylim=c(-1,1))
r.plot.add(x, y, type='l', col=rgb(0.8,0.8,0.0))
r.plot.add(x, z, type='l', col=rgb(0.0,0.8,0.8))

3.4 Points I

x = runif(500)-0.5
y = rnorm(500)-0.5
z = runif(500)-0.5

r.plot(y[1:100], type='p')
r.plot.add(y[1:100],type='l', col=rgb(0,0,0,0.1))

r.plot(x[1:10], y[1:10])
r.plot.add(x[1:10], y[1:10],type='l', col=rgb(0,0,0,0.1))

r.plot(x, y)

r.plot.new(x,y)
r.plot.add(x,y)

r.plot.new(c(x,x), c(x,y))
r.plot.add(x,y)
r.plot.add(x, z, icol=2)

3.5 Points II

r.plot.new(c(x,x), c(y,z))
r.plot.add(x, y, icol=1, alpha=0.3)
r.plot.add(x, z, icol=2, alpha=0.3)

r.plot.new(c(x,x), c(y,z))
r.plot.add(x, y, col=1, alpha=0.3)
r.plot.add(x, z, col=2, alpha=0.3)

r.plot.new(c(x,x), c(y,z))
r.plot.add(x, y, col=rgb(1,1,0), alpha=0.3)
r.plot.add(x, z, col=rgb(1,0,1), alpha=0.3)

r.plot(x, y, col=heat.colors(5))

r.plot(1:4, 1:4, col=terrain.colors(5), cex=20, alpha=0.6)

4 Other Plots

4.1 Third Axis

x = seq(0,10,0.5)
y = sin(x)
z = exp(x)

r.plot.new(x,y,thirdAxis=TRUE, main="Plot with 3rd Axis using new")
r.plot.add(x,y,type='l')
r.plot.coord(x, z)
r.plot.coord.axis(z)
r.plot.add(x,z,col=2,type='l')

r.plot(x,y,type='l',thirdAxis=TRUE, main="Plot with 3rd Axis")
r.plot.add(x,y,type='p')
r.plot.coord(x, z)
r.plot.coord.axis(z)
r.plot.add(x,z,col=2,type='l')
r.plot.add(x,z,col=2,type='p')

4.2 Bar plot

var = round(exp(runif(100)))
t = table(round(runif(100)),round(runif(100)))
rownames(t) = c("Tipo A", "Tipo B")
colnames(t) = c("Tipo A", "Tipo B")
r.plot.bar(var)

r.plot.bar(var, horizontal=TRUE)

r.plot.bar(table=t)

r.plot.bar(table=t, beside=TRUE)

r.plot.bar(table=t, horizontal=TRUE)

r.plot.bar(table=t, beside=TRUE, horizontal=TRUE)

r.plot.bar(table=t, beside=TRUE, horizontal=TRUE, background=FALSE, box=FALSE)

r.plot.bar(table=t, beside=TRUE, horizontal=TRUE, background=TRUE, box=FALSE)

4.3 Histogram plot

x = rnorm(2000)
r.plot.histogram(x)

r.plot.histogram(x, main="Density", freq=FALSE)

r.plot.histogram(x, breaks = 20)

4.4 Distribution plot

r.plot.distribution(x)

4.5 Heatmap

x = runif(5000)
y = runif(5000)
z = (x-0.7)^2+(y-0.6)^2
r.plot.heatmap(x,y,z)

r.plot.heatmap(x,y,z,mean)

data(volcano)
r.plot.heatmap(matrixData=volcano)

r.plot.heatmap(matrixData=volcano, palette=terrain.colors(12))

r.plot.heatmap(matrixData=volcano, contour=FALSE, palette=terrain.colors(12))

r.plot.heatmap(rnorm(10000), rnorm(10000), 
               xbreaks=30, ybreaks=30, 
               contour=FALSE, 
               palette=r.color.gradient.palette(c("white", r.color(1))))

r.plot.heatmap(rnorm(10000), rnorm(10000), 
               xbreaks=30, ybreaks=30, 
               contour=FALSE, 
               smooth=0.8,
               palette=r.color.gradient.palette(c("white", r.color(1))))

4.6 Treemap

x = runif(1000)
f = cut(x, breaks = 10)
t = table(f)
segmentosNames <- names(t)
segmentosGroup <- names(t)
segmentosArea <- as.numeric(t)
segmentosColor <- runif(length(segmentosNames))

r.plot.treemap(
  segment=segmentosNames, 
  segmentgroup=segmentosGroup,
  area=segmentosArea, 
  color=segmentosColor,
  colorScaleLeft=rgb(0.8,0.2,0.8), colorScaleCenter="White", colorScaleRight=rgb(0.2,0.2,0.8),
  main="Tree Map")

4.7 Radial plot

x1 <- runif(5)
x2 <- 0.2+0.6*x1+0.2*runif(5)
df <- data.frame(groupASD = c("Blue Collar Communities", "Prospering Suburbs"), matrix(c(x1,x2), nrow = 2, byrow = TRUE))
colnames(df)[2:ncol(df)] <- c("A", "B", "C", "D", "E")
r.plot.radial(df, legend=FALSE)

r.plot.radial(df)

4.8 Graph plot

data(UKfaculty, package = "igraphdata")
x = as.matrix(igraph::get.adjacency(UKfaculty))
mat = matrix(runif(400), 20)
r.plot.heatmap(matrixData=mat, contour=FALSE)

r.plot.matrix(mat)

r.plot.matrix(-mat)

r.plot.matrix(mat-0.5)

r.plot.matrix(x)

r.plot.matrix.communities(x)

r.plot.matrix(r.plot.matrix.communities(x))

r.plot.heatmap(matrixData=r.plot.matrix.communities(x), contour=FALSE)

r.plot.graph.text(x, vertexLabelCex=0.5, edgeWidthMax=1.5)

5 Palette

5.1 Gradient Colors

x = runif(10000)-0.5
y = runif(10000)-0.5

r.plot(x, y, col=r.color.gradient(x^2+y^2), alpha=0.4)

r.plot(x, y, col=r.color.gradient(x^2+y^2, levels=3), cex=2)

5.2 Show and Set Palette

r.palette.get()
##  [1] "#6673CCA6" "#CC3333A6" "#1AE61AA6" "#E68000A6" "#CC00CCA6"
##  [6] "#E0DB00A6" "#E60080A6" "#8000FFA6" "#80D900A6" "#0099E6A6"
## [11] "#666666A6" "#1A1A80A6" "#801A1AA6" "#006633A6" "#994D1AA6"
## [16] "#801A80A6" "#80801AA6" "#1A8080A6" "#00E680A6" "#333333A6"
r.palette.show()

r.palette.show(heat.colors(12))

r.palette.show(rainbow(12))

r.palette.show(r.color.gradient.palette(c("red", "blue", "green"), levels=20))

r.plot(1,1,type='p', cex=20)

r.setPalette(rev(rainbow(8)))
r.setColorAlpha(0.4)
r.plot(1,1,type='p', cex=20)

r.palette.restore()
r.plot(1,1,type='p', cex=20)

6 Data Analysis

6.1 Data exploration

df <- airquality
df$Month = factor(df$Month)
df$Day = factor(sample(1:28, nrow(df), replace=TRUE))
r.plot.data(df)

r.export.dataoverview
## function (dades, col = NULL, horizontal = FALSE, label.cex = 0.7, 
##     label.rotation = 45, legend = FALSE, legend.pos = "topright", 
##     legend.cex = 0.6, folder = "output/", filePrefix = "img_", 
##     filePrefixCont = "cont_", filePrefixCat = "cat_", fileSufix = "", 
##     width = 480, height = 480, ...) 
## {
##     if (missing(col)) 
##         col = r.palette.get()
##     for (icol in 1:ncol(dades)) {
##         strCol = names(dades)[icol]
##         values = dades[, icol]
##         if (is.numeric(values)) {
##             png(paste0(folder, filePrefix, filePrefixCont, strCol, 
##                 fileSufix, ".png"), width = width, height = height)
##             r.plot.histogram(values = values, col = col[1], main = strCol, 
##                 ...)
##             dev.off()
##         }
##         else {
##             png(paste0(folder, filePrefix, filePrefixCat, strCol, 
##                 fileSufix, ".png"), width = width, height = height)
##             r.plot.bar(values = values, col = col, horizontal = horizontal, 
##                 label.cex = label.cex, label.rotation = label.rotation, 
##                 legend = legend, legend.pos = legend.pos, legend.cex = legend.cex, 
##                 main = strCol, ...)
##             dev.off()
##         }
##     }
## }
## <environment: namespace:rplot>

6.2 Dimensionality Redution

r.plot2D.data(iris[,-5])

r.plot2D.pca(iris[,-5])

r.plot2D.nn(iris[1:50,-5])

km = kmeans(iris[,-5],3)
r.plot2D.data(iris[,-5], clustModel=km$cluster)

r.plot2D.pca(iris[,-5], clustModel=km$cluster)

# r.plot2D.nn(iris[1:50,-5], clustModel=km$cluster)
r.plot2D.data(iris[,-5], clustModel=km$cluster, clustReal=iris[,5])

r.plot2D.pca(iris[,-5], clustModel=km$cluster, clustReal=iris[,5])

# r.plot2D.nn(iris[,-5], clustModel=km$cluster, clustReal=iris[,5])

6.3 Binning

r.plot(iris$Petal.Width, iris$Petal.Length)

r.plot.burbujas
## function (datos, segmentacion, target, relativeToMean = FALSE, 
##     relativeToMeanX = FALSE, relativeToMeanY = FALSE, logScale = FALSE, 
##     logScaleX = FALSE, logScaleY = FALSE, laplaceSmooth = FALSE, 
##     showMean = FALSE, showMeanX = FALSE, showMeanY = FALSE, showTargetAbsMean = FALSE, 
##     sizeMin = 1, sizeMax = 3, xlab = "Vol.", ylab = "Freq.Rel.Target", 
##     ...) 
## {
##     require(rmodel)
##     segmentacionValidated = NaN
##     nelements = NaN
##     listValues = list()
##     for (ipos in 1:length(segmentacion)) {
##         segment = segmentacion[ipos]
##         var = datos[, segment]
##         values = unique(var)
##         listValues[[segment]] = values
##         nelements[ipos] = length(values)
##     }
##     pos = 1
##     elementsS = NaN
##     elementsV = NaN
##     finished = FALSE
##     index = rep(1, length(segmentacion))
##     while (!finished) {
##         ind = 1:nrow(datos)
##         for (ipos in 1:length(segmentacion)) {
##             segment = segmentacion[ipos]
##             ind = intersect(ind, which(datos[, segment] == listValues[[segment]][index[ipos]]))
##         }
##         if (length(ind) > 0) {
##             if (laplaceSmooth) {
##                 elementsS[pos] = (1 + length(which(datos[ind, 
##                   target] == 1)))/(1 + length(ind))
##                 elementsV[pos] = 1 + length(ind)
##                 pos = pos + 1
##             }
##             else {
##                 elementsS[pos] = length(which(datos[ind, target] == 
##                   1))/length(ind)
##                 elementsV[pos] = length(ind)
##                 pos = pos + 1
##             }
##         }
##         else if (laplaceSmooth) {
##             elementsS[pos] = 1
##             elementsV[pos] = 1
##             pos = pos + 1
##         }
##         ipos = 1
##         incrementFinished = FALSE
##         while (ipos <= length(segmentacion) && !incrementFinished) {
##             index[ipos] = index[ipos] + 1
##             if (index[ipos] > nelements[ipos]) {
##                 index[ipos] = 1
##                 ipos = ipos + 1
##             }
##             else {
##                 incrementFinished = TRUE
##             }
##         }
##         if (!incrementFinished) 
##             finished = TRUE
##     }
##     ind = 1:length(elementsS)
##     ind = intersect(ind, which(!is.na(elementsS)))
##     ind = intersect(ind, which(!is.infinite(elementsS)))
##     if (logScale) 
##         ind = intersect(ind, which(elementsS != 0))
##     burbujaS = elementsS[ind]
##     burbujaV = elementsV[ind]
##     meanS = mean(elementsS[ind])
##     meanV = mean(elementsV[ind])
##     if (relativeToMean) {
##         burbujaS = burbujaS/meanS
##         burbujaV = burbujaV/meanV
##     }
##     else {
##         if (relativeToMeanY) 
##             burbujaS = burbujaS/meanS
##         if (relativeToMeanX) 
##             burbujaV = burbujaV/meanV
##     }
##     if (logScale) {
##         burbujaS = log10(burbujaS)
##         burbujaV = log10(burbujaV)
##     }
##     else {
##         if (logScaleY) 
##             burbujaS = log10(burbujaS)
##         if (logScaleX) 
##             burbujaV = log10(burbujaV)
##     }
##     r.plot(x = burbujaV, y = burbujaS, cex = rmodel::r.normalize(x = elementsV[ind], 
##         imin = sizeMin, imax = sizeMax), type = "p", xlab = xlab, 
##         ylab = ylab, ...)
##     if (showMean) {
##         r.plot.add(x = c(meanV, meanV), y = c(-2 * (100 + max(abs(burbujaS))), 
##             2 * (100 + max(abs(burbujaS)))), type = "l", col = rgb(0, 
##             0, 0, 0.4))
##         r.plot.add(y = c(meanS, meanS), x = c(-2 * (100 + max(abs(burbujaV))), 
##             2 * (100 + max(abs(burbujaV)))), type = "l", col = rgb(0, 
##             0, 0, 0.4))
##     }
##     else {
##         if (showMeanX) 
##             r.plot.add(x = c(meanV, meanV), y = c(-2 * (100 + 
##                 max(abs(burbujaS))), 2 * (100 + max(abs(burbujaS)))), 
##                 type = "l", col = rgb(0, 0, 0, 0.4))
##         if (showMeanY) 
##             r.plot.add(y = c(meanS, meanS), x = c(-2 * (100 + 
##                 max(abs(burbujaV))), 2 * (100 + max(abs(burbujaV)))), 
##                 type = "l", col = rgb(0, 0, 0, 0.4))
##     }
##     if (showTargetAbsMean) {
##         meanTarget = rmodel::r.mean(datos[, target])
##         r.plot.add(y = c(meanTarget, meanTarget), x = c(-2 * 
##             (100 + max(abs(burbujaV))), 2 * (100 + max(abs(burbujaV)))), 
##             type = "l", col = rgb(1, 0, 0, 0.3))
##     }
## }
## <environment: namespace:rplot>
rmodel::r.segment(round(iris[,-5]), colnames(iris)[-5])
##   [1]   1   6   6  21   1  16   6  21   7  21  16  21   6   7   3  18   1
##  [18]   1  18  16  21  16   1  21  21  21  21  16   6  21  21  21  16   3
##  [35]  21   6   3   1   7  21   1  12   7 121  16   6  16   6  16   6 144
##  [52] 263 249 163 248 158 248 176 144 156 161 263 163 143 158 159 263 158
##  [69] 268 163 248 158 253 143 158 159 144 249 263 158 163 163 158 248 261
##  [86] 263 249 163 158 163 158 143 158 176 158 158 158 158 176 158 293 248
## [103] 294 293 293 310 266 294 299 289 248 248 294 253 248 248 293 305 310
## [120] 253 294 248 310 248 294 294 248 248 293 294 294 290 293 248 188 295
## [137] 293 293 248 249 294 249 248 294 294 249 253 248 248 248

6.4 K-means

km = kmeans(iris[,-5],3)
r.plot.kmeans.shapes(iris[,-5], km)

## K-means clustering with 3 clusters of sizes 38, 62, 50
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  6.850000000 3.073684211  5.742105263 2.071052632
## 2  5.901612903 2.748387097  4.393548387 1.433870968
## 3  5.006000000 3.428000000  1.462000000 0.246000000
## 
## Clustering vector:
##   [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [71] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1
## [106] 1 2 1 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1
## [141] 1 1 2 1 1 1 2 1 1 2
## 
## Within cluster sum of squares by cluster:
## [1] 23.87947368 39.82096774 15.15100000
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
r.plot.kmeans.shapes(iris[,-5], km, paintCentroids=TRUE)

## K-means clustering with 3 clusters of sizes 38, 62, 50
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  6.850000000 3.073684211  5.742105263 2.071052632
## 2  5.901612903 2.748387097  4.393548387 1.433870968
## 3  5.006000000 3.428000000  1.462000000 0.246000000
## 
## Clustering vector:
##   [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [71] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1
## [106] 1 2 1 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1
## [141] 1 1 2 1 1 1 2 1 1 2
## 
## Within cluster sum of squares by cluster:
## [1] 23.87947368 39.82096774 15.15100000
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
r.plot.kmeans.smoothshapes(iris[,-5], km)

6.5 Model performance

x=runif(1000)
y=c(round(0.8*x[1:200]+0.2*runif(200)),round(0.6*x[201:700]+0.4*runif(500)),round(runif(300)))
rmodel::r.performance.metrics(x, y)
##                ACTUAL_0    ACTUAL_1   PREDICTED   PRECISION
## PREDICTED_0         398         107         505     0,78812
## PREDICTED_1         133         362         495     0,73131
## ACTUAL              531         469       1.000     0,75972
## RECALL          0,74953     0,77186     0,76069     0,76000
## 
##        F1     phi
## 1 0,76834 0,52041
## 2 0,75104 0,52041
## 3 0,75969 0,52041
## 
##                        score
## Accuracy                0,76
## Precision            0,73131
## Recall (sensitivity) 0,77186
## specificity          0,74953
## Score F1             0,75104
## Score G              0,75131
## Score Beta           0,75104
## Score Phi (MCC)      0,52041
r.plot.confusionmatrix(x, y)
##                ACTUAL_0    ACTUAL_1   PREDICTED   PRECISION
## PREDICTED_0         398         107         505     0,78812
## PREDICTED_1         133         362         495     0,73131
## ACTUAL              531         469       1.000     0,75972
## RECALL          0,74953     0,77186     0,76069     0,76000

r.plot.F1(x, y)
##        F1     phi
## 1 0,76834 0,52041
## 2 0,75104 0,52041
## 3 0,75969 0,52041

r.plot.roc(x,y)

r.plot.gain(x,y)

r.plot.lift(x,y)

7 Interactive

r.plot(iris$Sepal.Length, iris$Sepal.Width, icol=iris[,5])
r.iplot(iris$Sepal.Length, iris$Sepal.Width, icol=iris[,5])
r.iplot.kmeans.shapes(iris[,-5])
r.iplot.smoothkmeans(iris[,-5])
r.iplot2D.data(iris[,-5], clustReal = iris[,5])
r.iplot2D.pca(iris[,-5], clustReal = iris[,5])